/*** This do-file uses LODES raw origin-destination data to bin ZCTAs into 
quartiles of workplace rent.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"

* Create required subfolders
cap mkdir "${root}/data/derived/LODES"

project, uses("${root}/data/dvc/LODES/source.yaml") reference
project, uses("${root}/data/dvc/LODES/LODESTechDoc7.5.pdf") reference

*-------------------------------------------------------------------------------
* Clean LODES crosswalk data
*-------------------------------------------------------------------------------

project, uses("${root}/data/dvc/LODES/xwalk/us_xwalk.csv.gz")
project, uses("${root}/code/ado/gzimport.ado") raw
gzimport delimited using "${root}/data/dvc/LODES/xwalk/us_xwalk.csv.gz", clear

tostring tabblk2010, replace format(%015.0f)
keep tabblk2010 st stusps stname zcta
drop if zcta == 99999

* State abbreviations for the 50 states + DC
replace stusps = strlower(stusps) 
glevelsof stusps if inrange(st, 1, 56), local(states)
assert r(J) == 51

save "${root}/data/derived/LODES/us_xwalk.dta", replace
project, creates("${root}/data/derived/LODES/us_xwalk.dta")

*-------------------------------------------------------------------------------
* Import csvs for each state and combine --> save as dta file
*-------------------------------------------------------------------------------
project, uses("${root}/code/ado/gzimport.ado") raw

foreach state of local states {
	
	* Import auxillary csv data for each state and save as tempfile	
	* Prioritize 2017 over 2016, and return an error if neither are present (it's fine if just one is present)
	cap project, uses("${root}/data/dvc/LODES/od/`state'_od_aux_JT01_2017.csv.gz")
	cap confirm file "${root}/data/dvc/LODES/od/`state'_od_aux_JT01_2017.csv.gz"
	
	if _rc == 0 {
		gzimport delimited using "${root}/data/dvc/LODES/od/`state'_od_aux_JT01_2017.csv.gz", clear
	}
	
	else {
		project, uses("${root}/data/dvc/LODES/od/`state'_od_aux_JT01_2016.csv.gz")
		gzimport delimited using "${root}/data/dvc/LODES/od/`state'_od_aux_JT01_2016.csv.gz", clear
	}
	
	tempfile aux_data 
	save `aux_data', replace
	
	* Import main csv data for each state and save as tempfile	
	* Prioritize 2017 over 2016, and return an error if neither are present (it's fine if just one is present)
	cap project, uses("${root}/data/dvc/LODES/od/`state'_od_main_JT01_2017.csv.gz")
	cap confirm file "${root}/data/dvc/LODES/od/`state'_od_main_JT01_2017.csv.gz"
	
	if _rc == 0 {
		gzimport delimited using "${root}/data/dvc/LODES/od/`state'_od_main_JT01_2017.csv.gz", clear
	}
	
	else {
		project, uses("${root}/data/dvc/LODES/od/`state'_od_main_JT01_2016.csv.gz")
		gzimport delimited using "${root}/data/dvc/LODES/od/`state'_od_main_JT01_2016.csv.gz", clear
	}
	
	* Append main and auxillary files
	append using `aux_data' 
	
	* Merge on workplace ZIP 
	gen tabblk2010 = string(w_geocode, "%015.0f")
	project, uses("${root}/data/derived/LODES/us_xwalk.dta")
	merge m:1 tabblk2010 using "${root}/data/derived/LODES/us_xwalk.dta", nogen keep(match master) keepusing(zcta)
	rename zcta zcta_work
	drop tabblk2010
	
	* Merge on home ZIP 
	gen tabblk2010 = string(h_geocode, "%015.0f")
	project, uses("${root}/data/derived/LODES/us_xwalk.dta")
	merge m:1 tabblk2010 using "${root}/data/derived/LODES/us_xwalk.dta", nogen keep(match master) keepusing(zcta)
	rename zcta zcta_home
	
	gcollapse (rawsum) se01 se03, by(zcta_home zcta_work)
	
	tempfile flows_`state' 
	save `flows_`state''
}
	
* Combine all states 
clear 	
foreach state of local states {
	append using `flows_`state'' 
}

* Combine counts for a few zctas that cross across states: 
gcollapse (rawsum) se01 se03, by(zcta_home zcta_work)

* Rename variables and save
rename (se01 se03)(low_pay_jobs high_pay_jobs)
lab var low_pay_jobs "(rawsum) se01 by home-work ZCTA pairs"
lab var high_pay_jobs "(rawsum) se03 by home-work ZCTA pairs"
save "${root}/data/derived/LODES/job_flows_residence_origin_zctas.dta", replace
project, creates("${root}/data/derived/LODES/job_flows_residence_origin_zctas.dta")

*-------------------------------------------------------------------------------
* Compute median workplace rent for low-wage jobs in each ZCTA
*-------------------------------------------------------------------------------

* Load job flows from home to work ZCTAs
project, uses("${root}/data/derived/LODES/job_flows_residence_origin_zctas.dta")
use "${root}/data/derived/LODES/job_flows_residence_origin_zctas.dta", clear	

* Merge on rent in each work ZCTA  
rename zcta_work zcta
project, uses("${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta")
merge m:1 zcta using "${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta", nogen keep(match) keepusing(med_2br_2014_2018_est)

* Collapse by home ZCTA 
gcollapse (rawsum) low_pay_jobs (mean) med_2br_rent_work_zcta = med_2br_2014_2018_est [w = low_pay_jobs], by(zcta_home) 
rename zcta_home zcta 

* Population of home ZCTA
project, uses("${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta")
merge 1:1 zcta using "${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta", nogen keep(match) keepusing(pop_2014_2018_est)
rename pop_2014_2018_est pop_2018

* Quartiles of work ZCTA rent, weighted by population of home ZCTAs
gquantiles work_rent_quartile = med_2br_rent_work_zcta [w = pop_2018], xtile nq(4)

lab var zcta "Home ZCTA"
lab var med_2br_rent_work_zcta "Mean of ACS 14-18 median 2-bedroom rent in ZCTAs of workplace"
lab var low_pay_jobs "(rawsum) se01 by home ZCTA"
lab var pop_2018 "Pop of home ZCTA"
lab var work_rent_quartile "Quartile of workplace ZCTA rent across home ZCTAs"

save "${root}/data/derived/LODES/ZCTA mean work rent low_pay_jobs.dta", replace
project, creates("${root}/data/derived/LODES/ZCTA mean work rent low_pay_jobs.dta")

*-------------------------------------------------------------------------------
* Compute log density of high-wage jobs in each ZCTA
*-------------------------------------------------------------------------------

project, uses("${root}/data/derived/LODES/job_flows_residence_origin_zctas.dta")
use "${root}/data/derived/LODES/job_flows_residence_origin_zctas.dta", clear	

gcollapse (rawsum) high_pay_jobs, by(zcta_home)
rename zcta_home zcta 

project, uses("${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta")
merge 1:1 zcta using "${root}/data/derived/ACS 2014-2018 5-Year ZCTA/ACS 2014-2018 ZCTA.dta", nogen keep(match) keepusing(zarealand)

gen zarealand_sqm = zarealand / (1609.37 ^ 2)
gen density_high_pay_jobs = high_pay_jobs / zarealand_sqm
gen log_density_high_pay_jobs = log(density_high_pay_jobs)

save "${root}/data/derived/LODES/ZCTA log density high_pay_jobs.dta", replace
project, creates("${root}/data/derived/LODES/ZCTA log density high_pay_jobs.dta")
